/* File 2 of 3 to create CCK results. */



#delimit ;
clear all;
local outfile "GWgapr10_CCKdecomp";
set more off;


di _n "$S_DATE $S_TIME";






use GWgap_2wFE_extract_v4_wfe_t, clear;
xtile wfe_t_izi_q = wfe_t, n(4);
keep snz_uid wfe_t_izi_q;
save temp_wfeq, replace;





*******************************************************************************;
* Preparing the data and normalising the FE;
*******************************************************************************;

* main analysis data set of individual level data with good prody data etc;

use if Q_rest==1 using GWgap_pr_IDI_v4, clear;
drop  num_pents multiplant ind4 anz06_4d  
	 lnWpm num_pents_cat indiv_avfte age;
	 
gen tenure = wkd_hpp_1ya + wkd_hpp_2ya;
label define tenure 0 "First year at firm" 1 "Second year at firm" 2 "Third or subsequent year at firm";
label val tenure tenure;
drop wkd_hpp_1ya wkd_hpp_2ya;



rename hp_pent pent;


* merging on wfe_t quartiles;

merge m:1 snz_uid using temp_wfeq, keep(master match) nogen;
	
* merging on production data;

#delimit ;
merge m:1 pent year using GWgap_pr_firm_v4, keep(match) nogen keepus(lngo lnM);
rename pent hp_pent;

* merging in hours worked;

merge 1:1 snz_uid year using GWgap_pr_HLFS_v4, keep(master match) nogen keepus(hrs_main_HLFS Q_rest_HLFS hqual
	numkid_u18_cat hhcomp_HLFS);
	
foreach var in hrs_main_HLFS hqual numkid_u18_cat hhcomp_HLFS {;
	replace `var' = . if Q_rest_HLFS!=1;
};

* making value added per worker;

gen val_ad_pw = (exp(lngo) - exp(lnM))/(L_hc);
replace val_ad_pw = val_ad_pw/10000;
label var val_ad_pw "Value added per worker (real $0,000s)";

gen lnval_ad_pw = ln(10000*val_ad_pw);
label var lnval_ad_pw "Value added per worker (ln)";



* normalize firm fixed effects;

#delimit ;
gen L_hc_rdd = round(L_hc);

foreach g in m f {;
	qui sum ffe_`g' if lnval_ad_pw<9.95 [fweight = L_hc_rdd]; * 9.95 is optimal T from CCKnorm;  
	gen ffe_`g'nva = ffe_`g' - r(mean);

};


* earnings variables;

gen lnearn_fte = ln(max_gross_earn_yr/max_fte_employee_av);
label var lnearn_fte "Earnings at highest paying pent/FTEs (ln)";

gen lnearn_hrs = ln(max_gross_earn_yr/hrs_main_HLFS);
label var lnearn_hrs "Earnings at highest paying pent/weekly hours (ln)";


gen tenure_HLFS = tenure if Q_rest_HLFS==1;
gen wfe_t_izi_q_HLFS = wfe_t_izi_q if Q_rest_HLFS==1;

save `outfile'_decompdat, replace;




*******************************************************************************;
* decompositions;
*******************************************************************************;

#delimit ;

program drop _all;
program define decomps;
   use GWgapr10_CCKdecomp_decompdat if `1', clear;
   collapse `3' ffe_mn`2' ffe_fn`2', by(female);

   sort female;
   gen mwage = `3'[1];
   gen fwage = `3'[2];
   gen wgap = `3'[1]-`3'[2];
   gen feM_males   = ffe_m[1];
   gen feM_females = ffe_m[2] ;
   gen feF_males   = ffe_f[1] ;
   gen feF_females = ffe_f[2];
 
   gen firm  = feM_males - feF_females;
   gen sortm = feM_males - feM_females;
   gen sortf = feF_males - feF_females;
   gen bargm = feM_males - feF_males;
   gen bargf = feM_females - feF_females;

   foreach v in firm sortm sortf bargm bargf {;
      gen `v'sh = (`v' / wgap)*100;
   };

  qui keep if _n==1;
  list wgap feM_males feF_females firm firmsh sortm sortmsh sortf sortfsh bargm bargmsh bargf 
	bargfsh feM_females feF_males mwage fwage, noobs;
end;


* CCK table 3 - paste these to a text file;

* IDI Overall;
 
di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs)";
decomps "female!=." va lnearn_fte;

* IDI by age category;

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): age <25";
decomps "age_cat==1" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): ages 25-39";
decomps "age_cat==2" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): ages 40 to 54";
decomps "age_cat==3" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs); ages 55+";
decomps "age_cat==4" va lnearn_fte; 

* IDI by tenure;

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): first year at firm";
decomps "tenure == 0" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): second year at firm";
decomps "tenure == 1" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): third or subsequent year at firm";
decomps "tenure == 2" va lnearn_fte; 

* IDI by quantile of worker FE;

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): wfe quartile 1";
decomps "wfe_t_izi_q == 1" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): wfe quartile 2";
decomps "wfe_t_izi_q == 2" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): wfe quartile 3";
decomps "wfe_t_izi_q == 3" va lnearn_fte; 

di "CCK decomposition for IDI restricted sample, with earnings measure ln(earnings/FTEs): wfe quartile 4";
decomps "wfe_t_izi_q == 4" va lnearn_fte; 


***;
* HLFS overall;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours)";
decomps "female!=." va lnearn_hrs;

* HLFS by age category;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): age <25";
decomps "age_cat==1" va lnearn_hrs; 

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): ages 25-39";
decomps "age_cat==2" va lnearn_hrs; 

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): ages 40 to 54";
decomps "age_cat==3" va lnearn_hrs; 

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours); ages 55+";
decomps "age_cat==4" va lnearn_hrs; 

* HLFS by tenure;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): first year at firm";
decomps "tenure_HLFS == 0" va lnearn_hrs; 

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): second year at firm";
decomps "tenure_HLFS == 1" va lnearn_hrs; 

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): third or subsequent year at firm";
decomps "tenure_HLFS == 2" va lnearn_hrs; 

* HLFS by qualifications;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): no quals";
decomps "hqual == 0" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): school quals";
decomps "hqual == 1" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): postschool quals";
decomps "hqual == 2" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): degree";
decomps "hqual == 3" va lnearn_hrs; 

* HLFS by household composition;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): single, no child";
decomps "hhcomp == 1" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): couple, no child";
decomps "hhcomp == 2" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): single, children";
decomps "hhcomp == 3" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): couple, children";
decomps "hhcomp == 4" va lnearn_hrs;

* HLFS by number of children under 18;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): no children";
decomps "numkid_u18_cat == 0" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): one child";
decomps "numkid_u18_cat == 1" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): two children";
decomps "numkid_u18_cat == 2" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): three or more children";
decomps "numkid_u18_cat == 3" va lnearn_hrs;

* HLFS by quantile of combined worker fixed effects;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): wfe quartile 1";
decomps "wfe_t_izi_q_HLFS == 1" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): wfe quartile 2";
decomps "wfe_t_izi_q_HLFS == 2" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): wfe quartile 3";
decomps "wfe_t_izi_q_HLFS == 3" va lnearn_hrs;

di "CCK decomposition for HLFS restricted sample, with earnings measure ln(earnings/hours): wfe quartile 4";
decomps "wfe_t_izi_q_HLFS == 4" va lnearn_hrs;


